library(dplyr)
library(tidyr)
library(ggplot2)
library(zoo)
library(DiagrammeR)
library(gridExtra)
library(png)

######################
###Correlation plot### 
######################
# Preprocess agricultural data (med.dat.ag) and forest data (med.dat.for)
med.dat.ag <- na.omit(full.2003.2018 %>%
                        filter(ag_crops == 1) %>%
                        select("gid", "clear_zoon_confirm", "lagclear_zoon_confirm", "Forest.coverage_bin",
                               "ag_crops", "fires.cell.count_anom", "NDVI_mean_anom", "year", "t", 
                               "log.nl", "logged.pop", "month"))

med.dat.for <- na.omit(full.2003.2018 %>%
                         filter(ag_crops == 0) %>%
                         select("gid", "clear_zoon_confirm", "lagclear_zoon_confirm", "Forest.coverage_bin",
                                "ag_crops", "fires.cell.count_anom", "NDVI_mean_anom", "year", "t", 
                                "log.nl", "logged.pop", "month"))

# Create dummy variables for months
unique_month <- unique(med.dat.ag$month)
for (month in unique_month) {
  new_col_name <- paste0("f_month", month)  # Create a column name
  med.dat.ag[[new_col_name]] <- ifelse(med.dat.ag$month == month, 1, 0)  # Assign 1 if matched, else 0
  med.dat.for[[new_col_name]] <- ifelse(med.dat.for$month == month, 1, 0)  # Same for forest data
}

# Create lagged variables function
create_lags <- function(df, lag_months) {
  df %>%
    group_by(gid) %>%
    arrange(t) %>%
    mutate(across(fires.cell.count_anom, ~lag(.x, n = lag_months), 
                  .names = "fire_lag_{.col}_{.fn}")) %>%
    ungroup()
}

# Lags we want to compute (in months)
#lags <- c(0,4,8,12)
lags <- c(0,1,2,4,8)


# Create lagged variables for both datasets (agriculture and forest)
for (l in lags) {
  med.dat.ag[[paste0("fire_lag_", l)]] <- med.dat.ag %>%
    group_by(gid) %>%
    arrange(t) %>%
    mutate(lagged = lag(fires.cell.count_anom, n = l)) %>%
    pull(lagged)
  
  med.dat.for[[paste0("fire_lag_", l)]] <- med.dat.for %>%
    group_by(gid) %>%
    arrange(t) %>%
    mutate(lagged = lag(fires.cell.count_anom, n = l)) %>%
    pull(lagged)
}

# Calculate correlation for agricultural zones (med.dat.ag)
cor_results_ag <- lags %>%
  purrr::map_df(function(l) {
    cor_val <- cor(med.dat.ag[[paste0("fire_lag_", l)]], med.dat.ag$clear_zoon_confirm, use = "complete.obs")
    tibble(lag = l, correlation = cor_val, dataset = "Agricultural")
  })

# Calculate correlation for forest zones (med.dat.for)
cor_results_for <- lags %>%
  purrr::map_df(function(l) {
    cor_val <- cor(med.dat.for[[paste0("fire_lag_", l)]], med.dat.for$clear_zoon_confirm, use = "complete.obs")
    tibble(lag = l, correlation = cor_val, dataset = "Forest")
  })

# Combine results
cor_results <- bind_rows(cor_results_ag, cor_results_for)
names(cor_results) <- c("lag","correlation", "Land Use Type")

# Plot correlation figure
setwd("~/OneDrive - Indiana University/FromGoogle/syd/plots/")  # Set working directory for saving plot
png(filename = "corplots.png", width = 6, height = 6, units = 'in', res = 500)
ggplot(cor_results, aes(x = lag, y = correlation, color = `Land Use Type`)) +
  geom_line() + geom_point() +
  labs(title = "",
       x = "Fire anomalies lag (months)", y = "Pearson correlation") +
  scale_color_manual(values = c("Agricultural" = "blue", "Forest" = "forestgreen",name = "Land Use Type")) +  # Custom colors
  theme_minimal() +
  scale_y_continuous(labels = scales::label_number())  +# Ensures numeric y-axis labels
  theme(
    axis.text.y = element_text(size = 10), 
    axis.text.x = element_text(size = 10),
    legend.position = "none", #"right",
    panel.border = element_rect(color = "black", fill = NA)  # black frame
  )
dev.off()

########################
###Mediation lag plot###
########################
library(DataCombine)

########Lags
full.2003.2018 <- full.2003.2018[order(full.2003.2018$gid, full.2003.2018$t), ]
full.2003.2018 <- slide(full.2003.2018, Var="fires.cell.count_anom",TimeVar="t", 
                        GroupVar="gid", 
                        NewVar="lagfires.cell.count_anom", slideBy = -1,
                        keepInvalid = FALSE, reminder = TRUE)
full.2003.2018 <- slide(full.2003.2018, Var="fires.cell.count_anom",TimeVar="t", 
                        GroupVar="gid", 
                        NewVar="lag2fires.cell.count_anom", slideBy = -2,
                        keepInvalid = FALSE, reminder = TRUE)
full.2003.2018 <- slide(full.2003.2018, Var="fires.cell.count_anom",TimeVar="t", 
                        GroupVar="gid", 
                        NewVar="lag4fires.cell.count_anom", slideBy = -4,
                        keepInvalid = FALSE, reminder = TRUE)
full.2003.2018 <- slide(full.2003.2018, Var="fires.cell.count_anom",TimeVar="t", 
                        GroupVar="gid", 
                        NewVar="lag8fires.cell.count_anom", slideBy = -8,
                        keepInvalid = FALSE, reminder = TRUE)
full.2003.2018 <- slide(full.2003.2018, Var="fires.cell.count_anom",TimeVar="t", 
                        GroupVar="gid", 
                        NewVar="lag12fires.cell.count_anom", slideBy = -12,
                        keepInvalid = FALSE, reminder = TRUE)


#Prepare dat for analysis
# subset na rm data
med.dat <- na.omit(full.2003.2018[,c("gid","clear_zoon_confirm","lagclear_zoon_confirm","Forest.coverage_bin","ag_crops","fires.cell.count_anom","lagfires.cell.count_anom","lag2fires.cell.count_anom","lag4fires.cell.count_anom","lag8fires.cell.count_anom","lag12fires.cell.count_anom","NDVI_mean_anom","log.nl","logged.pop","month")])
#Create regular dummies for the mediate package
unique_month <- unique(med.dat$month)
#run loop 
for (month in unique_month) {
  new_col_name <- paste0("f_month", month)  # Create a column name
  med.dat[[new_col_name]] <- ifelse(med.dat$month == month, 1, 0)  # Assign 1 if matched, else 0
}

#########################
### Agricutlrual areas###
# subset only ag areas
med.dat.ag <- subset(med.dat, (ag_crops>0))
####Lag zero
#Run base model
med.fit0.l <- lm(NDVI_mean_anom ~ fires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl +logged.pop+
                   f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(med.fit0.l)
#Run fit model
out.fit0.l <- lm(clear_zoon_confirm ~ NDVI_mean_anom+fires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl + logged.pop+
                   f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(out.fit0.l)
##Mediate
med.out0.l1 <- mediate(med.fit0.l, out.fit0.l, treat = "fires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200)
summary(med.out0.l1)

####Lag one
#Run base model
med.fit1.l <- lm(NDVI_mean_anom ~ lagfires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl +logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(med.fit1.l)
#Run fit model
out.fit1.l <- lm(clear_zoon_confirm ~ NDVI_mean_anom+lagfires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl + logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(out.fit1.l)
##Mediate
med.out1.l1 <- mediate(med.fit1.l, out.fit1.l, treat = "lagfires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200, cluster = med.dat.ag$gid)
summary(med.out1.l1)

####Lag two
#Run base model
med.fit2.l <- lm(NDVI_mean_anom ~ lag2fires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl +logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(med.fit2.l)
#Run fit model
out.fit2.l <- lm(clear_zoon_confirm ~ NDVI_mean_anom+lag2fires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl + logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(out.fit2.l)
##Mediate
med.out2.l1 <- mediate(med.fit2.l, out.fit2.l, treat = "lag2fires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200, cluster = med.dat.ag$gid)
summary(med.out2.l1)

####Lag four
#Run base model
med.fit4.l <- lm(NDVI_mean_anom ~ lag4fires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl +logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(med.fit4.l)
#Run fit model
out.fit4.l <- lm(clear_zoon_confirm ~ NDVI_mean_anom+lag4fires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl + logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(out.fit4.l)
##Mediate
med.out4.l1 <- mediate(med.fit4.l, out.fit4.l, treat = "lag4fires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200, cluster = med.dat.ag$gid)
summary(med.out4.l1)

####Lag eight
#Run base model
med.fit8.l <- lm(NDVI_mean_anom ~ lag8fires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl +logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(med.fit8.l)
#Run fit model
out.fit8.l <- lm(clear_zoon_confirm ~ NDVI_mean_anom+lag8fires.cell.count_anom+
                   lagclear_zoon_confirm+log.nl + logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(out.fit8.l)
##Mediate
med.out8.l1 <- mediate(med.fit8.l, out.fit8.l, treat = "lag8fires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200, cluster = med.dat.ag$gid)
summary(med.out8.l1)

################
##Forest areas##
#Subset only forest data
med.dat.for <- subset(med.dat, (Forest.coverage_bin>0))

####Lag zero
#Run base model
med.fit0.l2 <- lm(NDVI_mean_anom ~ fires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl +logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(med.fit0.l2)
#Run fit model
out.fit0.l2 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+fires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl + logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(out.fit0.l2)
##Mediate
med.out0.l2 <- mediate(med.fit0.l2, out.fit0.l2, treat = "fires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200, cluster = med.dat.for$gid)
summary(med.out0.l2)

####Lag one
#Run base model
med.fit1.l2 <- lm(NDVI_mean_anom ~ lagfires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl +logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(med.fit1.l2)
#Run fit model
out.fit1.l2 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+lagfires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl + logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(out.fit1.l2)
##Mediate
med.out1.l2 <- mediate(med.fit1.l2, out.fit1.l2, treat = "lagfires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200, cluster = med.dat.for$gid)
summary(med.out1.l2)

####Lag two
#Run base model
med.fit2.l2 <- lm(NDVI_mean_anom ~ lag2fires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl +logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(med.fit2.l2)
#Run fit model
out.fit2.l2 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+lag2fires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl + logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(out.fit2.l2)
##Mediate
med.out2.l2 <- mediate(med.fit2.l2, out.fit2.l2, treat = "lag2fires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200, cluster = med.dat.for$gid)
summary(med.out2.l2)

####Lag four
#Run base model
med.fit4.l2 <- lm(NDVI_mean_anom ~ lag4fires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl +logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(med.fit4.l2)
#Run fit model
out.fit4.l2 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+lag4fires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl + logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(out.fit4.l2)
##Mediate
med.out4.l2 <- mediate(med.fit4.l2, out.fit4.l2, treat = "lag4fires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200, cluster = med.dat.for$gid)
summary(med.out4.l2)

####Lag eight
#Run base model
med.fit8.l2 <- lm(NDVI_mean_anom ~ lag8fires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl +logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(med.fit8.l2)
#Run fit model
out.fit8.l2 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+lag8fires.cell.count_anom+
                    lagclear_zoon_confirm+log.nl + logged.pop+f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+                  f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(out.fit8.l2)
##Mediate
med.out8.l2 <- mediate(med.fit8.l2, out.fit8.l2, treat = "lag8fires.cell.count_anom", mediator = "NDVI_mean_anom",
                       sims = 200, cluster = med.dat.for$gid)
summary(med.out8.l2)

##########
###Plot###
# Define the lags and types
lags <- c(0, 1, 2, 4, 8)
types <- c("Agricultural", "Forest")
labels <- c("l1" = "Agricultural", "l2" = "Forest")

# Helper function to extract ACME estimate and CI
extract_acme <- function(x, y) {
  obj_name <- paste0("med.out", x, ".l", y)
  med_obj <- get(obj_name)
  
  est <- med_obj$d0
  lower <- quantile(med_obj$d0.sims, .05)
  upper <- quantile(med_obj$d0.sims, .95)
  
  data.frame(
    lag = x,
    type = labels[[paste0("l", y)]],
    acme = est,
    lower = lower,
    upper = upper
  )
}

# Create full data frame
results_df <- purrr::map_dfr(lags, function(x) {
  purrr::map_dfr(1:2, function(y) extract_acme(x, y))
})

# Order for plotting: forest then agriculture
results_df$type <- factor(results_df$type, levels = c("Forest", "Agricultural"))
results_df$lag <- factor(results_df$lag, levels = sort(unique(results_df$lag)))

# Create combined factor for x-axis: e.g. "0-forest", "0-agriculture", etc.
results_df$group <- factor(paste0(results_df$lag, "-", results_df$type),
                           levels = unlist(lapply(sort(unique(results_df$lag)), function(x) paste0(x, "-", c("Forest", "Agricultural")))))

# Add outcome means
results_df$OutcomeMean <- c(0.0003233732,0.0003629785,0.0003233732,0.0003629785,
                            0.0003233732,0.0003629785,0.0003233732,0.0003629785,
                            0.0003233732,0.0003629785)

# Convert to percents
results_df <- results_df %>%
  dplyr::mutate(
    acme_pct = 100 * acme / OutcomeMean,
    lower_pct = 100 * lower / OutcomeMean,
    upper_pct = 100 * upper / OutcomeMean
  )

# Define divider positions
divider_pos <- c(2.5, 4.5, 6.5, 8.5)

# Custom x-axis labels: show label on first bar of each lag pair
x_labels <- c(
  "0", "",  # lag 0
  "1", "",  # lag 1
  "2", "",  # lag 2
  "4", "",  # lag 4
  "8", ""   # lag 8
)


# Calculate the means of acme_pct for each group and type
mean_df <- results_df %>%
  group_by(group, type) %>%
  summarise(mean_acme_pct = mean(acme_pct), .groups = 'drop')

# Plot with geom_smooth for trend lines
setwd("~/OneDrive - Indiana University/FromGoogle/syd/plots/")
png(filename = "lagmed.png", width = 8, height = 6, units = 'in', res = 500)

ggplot(results_df, aes(x = group, y = acme_pct, color = type)) +
  geom_point(position = position_dodge(width = 0.5), size = 3) +
  geom_errorbar(aes(ymin = lower_pct, ymax = upper_pct), 
                width = 0.2, position = position_dodge(width = 0.5)) +
  geom_vline(xintercept = divider_pos, linetype = "dotted", color = "gray40") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey40", size = 0.6) +
  geom_smooth(method = "loess", aes(group = type), size = 0.5, linetype = "solid", se = FALSE) +  # Trend lines using smoothing
  labs(
    x = "Fire anomalies lag (months)", 
    y = "ACME (% change from outcome mean)", 
    color = "Type"
  ) +
  scale_x_discrete(labels = x_labels) +
  scale_y_continuous(labels = scales::label_percent(scale = 1)) +
  scale_color_manual(values = c("Agricultural" = "blue", "Forest" = "forestgreen")) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 10), 
    axis.text.x = element_text(size = 10, hjust = -4),
    legend.position = "right",
    panel.border = element_rect(color = "black", fill = NA)
  )

dev.off()

#######################
###Conceptual figure###
graph <- grViz("
digraph timeline {
  rankdir = LR;
  node [shape = box, style = filled, color = lightgray];

  FireActivity [label = 'Fire Activity']
  FireActivityLagged [label = 'Lagged Fire Activity']
  LULCChange [label = 'LULC Change\n(Forest to Agriculture)']
  HabitatLoss [label = 'Habitat Loss / Fragmentation']
  BiodiversityLoss [label = 'Reservoir Species\nDominance']
  ZoonoticSpillover [label = 'Zoonotic Spillover']

  FireActivity -> LULCChange [label = 'immediate effect']
  FireActivityLagged -> HabitatLoss [label = 'delayed effect (months/years)']
  LULCChange -> HabitatLoss
  HabitatLoss -> BiodiversityLoss
  BiodiversityLoss -> ZoonoticSpillover

  FireActivity -> FireActivityLagged [label = 'affects future fire activity']
}
")

##################################
###Combine both into one figure###
# Load the images as raster objects
cor_plot <- rasterGrob(readPNG("corplots.png"))
lag_plot <- rasterGrob(readPNG("lagmed.png"))
conceptual_plot <- rasterGrob(readPNG("visplot.png"))

# Combine the plots into a top row and bottom plot
top_row <- arrangeGrob(
  grobs = list(cor_plot, lag_plot),
  ncol = 2,
  heights = unit(1, "npc")  # Ensure no space between top-row plots
)

# Combine top row with conceptual_plot, and REMOVE padding or margin between
combined_plot <- arrangeGrob(
  grobs = list(top_row, conceptual_plot),
  ncol = 1,
  heights = c(8, 1),  # Adjusting the ratio of the plots
  padding = unit(c(1, 0), "lines")  # Remove both horizontal and vertical spacing
)

# Save the combined plot
ggsave("combined_plots.jpeg", plot = combined_plot, width = 16, height = 12, dpi = 300, units = "in")

